/*==================================================
project:       Cleans up SISBEN for use
Author:        David L. Vargas 
E-email:       davidvar@iadb.org
url:           
Dependencies:  
----------------------------------------------------
Creation Date:    22 Aug 2022 - 22:03:23
Modification Date:   
Do-file version:    01
References:          
Output:             
==================================================*/

 /*==================================================
               0: Program set up
 ==================================================*/
 *Written on STATA 17
 drop _all
 set varabbrev off	// no variable abbreviations allowed (personal preference)
 
 /*==================================================
               1: Assets cleaning (A)
 ==================================================*/
 
 use "${dir2r}/survey_data/COL_SISBEN_ActivoA_RD_IDB.dta", clear
 
 keep if resultado == 1
 
 *----------- 1.0. aux data 
 
 //>>>> 1.0.1 prepare actual data for easy merging 
 preserve 
 replace q08_03 = 0 if q08_02 == 5  // this allows mergin pre
 
 tempfile merger 
 save `merger', replace
 restore
 
 *----------- 1.1. Long version
 /* Notes:
 Easier to create a blank panel and populate
 All fully completed are either yes or no answ. No missings 
 */
 
 //>>>> 1.1.1 Create blank panel
 * keep ID and asset 
 keep hh_id q08_01_cod 
 duplicates drop
 
 * add year 
 expand 5
 bys hh_id q08_01_cod: gen q08_02 = _n
 
 * add month 
 expand 13 if q08_02 < 5
 bys hh_id q08_01_cod q08_02: gen q08_03 = _n
 
 replace q08_03 = 0 if q08_02 == 5
 replace q08_03 = 98 if q08_03 == 13 
 
 //>>>> 1.1.2. Popualte blank panel 
 merge 1:1 hh_id q08_01_cod q08_02 q08_03  using `merger', keep(1 3) keepusing(q08_01)
 
 * Transform YES and No to dummies
 foreach v of varlist q08_01  {
 	replace `v' = 0 if `v' == 2
 	lab define l`v' 1 "Yes" 0 "No" , modify
 	lab values `v' l`v'
 	replace `v' = . if `v' == 98
 }
 
 * not matches are actualy zeros 
 replace q08_01 = 0 if _merge == 1 & q08_03 != 98
 drop _merge 
 
 * lets set before 2019 as year 0
 replace q08_02 = 0 if q08_02 ==5
 
 * Lets set an ownership variable 
 bys hh_id q08_01_cod : egen q08_01a = max(q08_01)
 lab values q08_01a lq08_01
 
 //>>>> 1.1.3. Ordering and labelling
 * order 
 sort hh_id q08_01_cod q08_02 q08_03
 	
 * add labels to make it fancy 
 lab define lq08_02 0 "Before 2019" 1 "2019" 2 "2020" 3 "2021" 4 "2022"
 lab values q08_02 lq08_02
 
 lab define lq08_03 0 "before 2019" 98 "DNK"
 lab values q08_03 lq08_03
 
 * add labels
 lab var q08_01_cod 	"Asset"
 lab var q08_02			"Year of purchase"	 
 lab var q08_03			"Month of purchase"	 
 lab var q08_01			"Household purchased asset during given month"	
 lab var q08_01a		"Household owns asset"	
 
 //>>>> 1.1.4. Final save 
 save "${dir3r}/01_survey/replica/survey_targeting_assetsA_r1.dta" , replace
 
 *----------- 1.2. Wide version
 
 use "${dir3r}/01_survey/replica/survey_targeting_assetsA_r1.dta" , clear
 
 //>>>> 1.2.1 Reorder data 
 
 * lets keep ownership and gen a date of purchase
 gen year = q08_02 + 2018
 gen date_purchase = ym(year, q08_03) if q08_02 >0 & q08_01 == 1
 format date_purchase %tm
 bys hh_id q08_01_cod: egen q08_02b = max(date_purchase)
 format q08_02b %tm
 gen yearpur = q08_02 if q08_01 == 1 
 bys hh_id q08_01_cod: egen q08_02c = max(yearpur)
 label val q08_02c `: value label q08_02'
 
 * drop data not longer needed
 keep if q08_03 == 0 
 drop q08_02 q08_03 year date_purchase yearpur
 
 * fix variables 
 rename (q08_01 q08_01a) (q08_01a q08_01)
 rename (q08_02c q08_02b) (q08_02 q08_02a)
 
 gen _aux = !missing(q08_02) & q08_02 > 0
 replace q08_01a = _aux
 drop _aux
 
 /*  Notice that:
 	  q08_01 	"Household owns"
 	  q08_01a 	"Purchased since 2019"
 	  q08_02 	"Date of purchase"  */
 	 
 //>>>> 1.2.2 Reshape data
 reshape wide q08_01 q08_01a q08_02 q08_02a, i(hh_id) j(q08_01_cod)
 
 * Appropiate renaming and labelling
 tokenize `c(alpha)'
 glo ord "" // global macro for ordering 
 forv i = 1/8 {
 	loc k = "``i''"
	rename q08_01`i' 	q08_01`k'
	rename q08_01a`i'	q08_01a`k'
	rename q08_02`i'	q08_02`k' 
	rename q08_02a`i'	q08_02a`k' 
	
	if (`i' == 1) loc a = "Refrigerator or fridge"
    if (`i' == 2) loc a = "Washing machine"
    if (`i' == 3) loc a = "Computer"
    if (`i' == 4) loc a = "Tablet"
    if (`i' == 5) loc a = "Smart phone"
    if (`i' == 6) loc a = "Motorbike or scooter"
    if (`i' == 7) loc a = "Tractor"
    if (`i' == 8) loc a = "Car"
	
	lab var q08_01`k'   "Household owns: `a'"
	lab var q08_01a`k'  "HH purchased since 2019: `a'" 
	lab var q08_02`k'   "Year of purchase: `a'" 
	lab var q08_02a`k'  "Date of purchase: `a' (if provided)" 
	
	* additional variable asset ownership pre-pandemic
	gen q08_01`k'_pre = (q08_01a`k' == 0 & q08_01`k'==1) | (!missing(q08_02`k') & q08_02a`k' <= tm(2020m1)  )
	lab var q08_01`k'_pre   "HH owned `a' before COVID" 
	lab values q08_01`k'_pre  lq08_01
	
	glo ord "$ord q08_01`k' q08_01`k'_pre q08_01a`k' q08_02`k' q08_02a`k' "
 }
 
 //>>>> 1.2.3 Ordering and merging prep
 order hh_id $ord
 
 rename hh_id ID_hogar
 
 tempfile assetsA 
 save `assetsA', replace 
 
 
 //>>>> 1.2.2 merge with main data 
 use "${dir3r}/01_survey/replica/survey_targeting_r1.dta", clear
 
 cap drop m_assetsA assets_*
 merge 1:1 ID_hogar using `assetsA', gen(m_assetsA)
 
 order $ord , after(housing_36g)
 
 rename q08_* assets_*
 
 * Store changes 
 save "${dir3r}/01_survey/replica/survey_targeting_r1.dta" , replace
 
 
 
 /*==================================================
               2: Assets cleaning (B) 
 ==================================================*/
 
 use "${dir2r}/survey_data/COL_SISBEN_ActivoB_RD_IDB.dta", clear
 
 keep if resultado == 1
 
 //>>>> 2.0.1 prepare actual data for easy merging 
 preserve 
 tempfile merger 
 save `merger', replace
 restore
 
 *----------- 2.1. Long version
 /* Notes:
 Easier to create a blank panel and populate
 All fully completed are either yes or no answ. No missings 
 */

 //>>>> 2.1.1 Create blank panel
 * keep ID and asset 
 keep hh_id q08_04_cod 
 duplicates drop
 
 * add year 
 expand 4
 bys hh_id q08_04_cod: gen q08_05 = _n
 
 * add month 
 expand 13 if q08_05 < 5
 bys hh_id q08_04_cod q08_05: gen q08_06 = _n
 
 replace q08_06 = 98 if q08_06 == 13 
 
 
 //>>>> 2.1.2. Popualte blank panel
 
 merge 1:1 hh_id q08_04_cod q08_05 q08_06 ///
 using `merger', keep(1 3) keepusing(q08_04)
 
 * Transform YES and No to dummies
 foreach v of varlist q08_04  {
 	replace `v' = 0 if `v' == 2
	lab define l`v' 1 "Yes" 0 "No" , modify
	lab values `v' l`v'
	replace `v' = . if `v' == 98
 }
 
 * not matches are actualy zeros 
 replace q08_04 = 0 if _merge == 1 & q08_06 != 98
 drop _merge 
 
 //>>>> 2.1.3. Ordering and labelling
 * order 
 sort hh_id q08_04_cod q08_05 q08_06
	
 * add labels to make it fancy 
 lab define lq08_05  1 "2019" 2 "2020" 3 "2021" 4 "2022"
 lab values q08_05 lq08_05
 
 lab define lq08_06 98 "DNK"
 lab values q08_06 lq08_06
 
 * add labels
 lab var q08_04_cod 	"Asset"
 lab var q08_05			"Year of sale"	 
 lab var q08_06			"Month of sale"	 
 lab var q08_04			"Household sold asset during given month"	
 
 //>>>> 2.1.4. Final save 
 save "${dir3r}/01_survey/replica/survey_targeting_assetsB_r1.dta" , replace
 
 *----------- 2.2. Wide version
 
 use "${dir3r}/01_survey/replica/survey_targeting_assetsB_r1.dta" , clear
 
 //>>>> 2.2.1 Reorder data 
 
 * lets keep ownership and gen a date of purchase
 gen year = q08_05 + 2018
 gen date_sold = ym(year, q08_06) if q08_04 == 1
 format date_sold %tm
 bys hh_id q08_04_cod: egen q08_05b = max(date_sold)
 format q08_05b %tm
 gen yearsol = q08_05 if q08_04 == 1 
 bys hh_id q08_04_cod: egen q08_05c = max(yearsol)
 label val q08_05c `: value label q08_05'
 
 * drop data not longer needed
 keep if q08_06 == 1 & q08_05 == 1
 drop q08_05 q08_06 year date_sold yearsol
 
 * fix variables 
 rename (q08_05c q08_05b) (q08_05 q08_05a)
 
 gen _aux = !missing(q08_05)
 replace q08_04 = _aux
 drop _aux
 
 /*  Notice that:
 	  q08_04 	"Household sold asset at some point"
 	  q08_05 	"Sold year"
 	  q08_05a 	"Date of sale"  */
 	 
 //>>>> 1.2.2 Reshape data
 reshape wide q08_04 q08_05 q08_05a, i(hh_id) j(q08_04_cod)
 
 * Appropiate renaming and labelling
 tokenize `c(alpha)'
 glo ord "" // global macro for ordering 
 forv i = 1/8 {
 	loc k = "``i''"
	rename q08_04`i' 	q08_04`k'
	rename q08_05`i'	q08_05`k' 
	rename q08_05a`i'	q08_05a`k' 
	
	if (`i' == 1) loc a = "Refrigerator or fridge"
    if (`i' == 2) loc a = "Washing machine"
    if (`i' == 3) loc a = "Computer"
    if (`i' == 4) loc a = "Tablet"
    if (`i' == 5) loc a = "Smart phone"
    if (`i' == 6) loc a = "Motorbike or scooter"
    if (`i' == 7) loc a = "Tractor"
    if (`i' == 8) loc a = "Car"
	
	lab var q08_04`k'   "HH sold since 2019: `a'"
	lab var q08_05`k'   "Year of sale: `a'" 
	lab var q08_05a`k'  "Date of sale: `a' (if provided)" 
	
	glo ord "$ord q08_04`k' q08_05`k' q08_05a`k' "
 }
 
 //>>>> 1.2.3 Ordering and merging prep
 order hh_id $ord
 
 rename hh_id ID_hogar
 
 tempfile assetsB 
 save `assetsB', replace 
 
 
 //>>>> 1.2.2 merge with main data 
 use "${dir3r}/01_survey/replica/survey_targeting_r1.dta", clear
 
 cap drop m_assetsB 
 merge 1:1 ID_hogar using `assetsB', gen(m_assetsB)
 
 order $ord , after(assets_02ah)
 
 rename q08_* assets_*
 
 * Store changes 
 save "${dir3r}/01_survey/replica/survey_targeting_r1.dta" , replace
 
 
 /*==================================================
			   2: Losses cleaning
 ==================================================*/
 use "${dir2r}/survey_data/COL_SISBEN_Perdida_RD_IDB.dta", clear

 keep if resultado == 1 // keep relevant values (completed surveys)
 
 drop /*list_sample*/ pais resultado
 
 *---------- 2.1 Clenning 
 
 * Transform YES and No to dummies
 foreach v of varlist q09_01a q09_01b q09_01c q09_01d q09_01e q09_02 q09_03a q09_03b q09_03c q09_03d q09_03e q09_03f q09_03g q09_03h q09_03i q09_03j q09_03k q09_03l q09_03z q09_05 q09_06a q09_06b q09_06c q09_06d q09_06e q09_06f q09_06g q09_06h q09_06i q09_06j q09_06k q09_06l q09_06z q09_08 q09_09a q09_09b q09_09c q09_09d q09_09e q09_09f q09_09g q09_09h q09_09i q09_09j q09_09k q09_09l q09_09z {
 	replace `v' = 0 if `v' == 2
	lab define l`v' 1 "Yes" 0 "No" , modify
	lab values `v' l`v'
	replace `v' = . if `v' == 98
 }
 
 * change missings for zeros event by year if no event
 foreach v in q09_01a q09_01b q09_01c { 
	 replace `v' = 0 if q09_01d == 1
 }
 
 * set missing to zeros if event happend in other year
 egen q09_01 = rowmax(q09_01a q09_01b q09_01c)  // event in any year
 order q09_01, before(q09_01a)
 lab values q09_01 lq09_01a
 foreach v in q09_01a q09_01b q09_01c q09_01d {
 	replace `v' = 0 if `v' == . & q09_01 == 1
 }
 
 * fix zeros 2019
 foreach v of varlist q09_02 q09_03a q09_03b q09_03c q09_03d q09_03e q09_03f q09_03g q09_03h q09_03i q09_03j q09_03k q09_03l q09_03z {
 	replace `v' = 0 if `v' == . & q09_01a == 0 
 }
 
 * fix zeros 2020
 foreach v of varlist q09_05 q09_06a q09_06b q09_06c q09_06d q09_06e q09_06f q09_06g q09_06h q09_06i q09_06j q09_06k q09_06l q09_06z {
 	replace `v' = 0 if `v' == . & q09_01b == 0 
 }
 
 * fix zeros 2021
 foreach v of varlist q09_08 q09_09a q09_09b q09_09c q09_09d q09_09e q09_09f q09_09g q09_09h q09_09i q09_09j q09_09k q09_09l q09_09z {
 	replace `v' = 0 if `v' == . & q09_01c == 0 
 }

 
 *--------- 2.2 Reshaping 
 drop id_sec09
 reshape wide q09_*, i(hh_id) j(cod_09)
 
 
 *--------- 2.3 labelling 
 forv ev = 1/9{
 	if (`ev'==1)		loc evn "Accident or illness"
	if (`ev'==2)		loc evn "Death"
	if (`ev'==3)		loc evn "Separation of spouses"
	if (`ev'==4)		loc evn "bankruptcy or closure business"
	if (`ev'==5)		loc evn "Loss or cut-off of remittances"
	if (`ev'==6)		loc evn "Theft or destruction of property"
	if (`ev'==7)		loc evn "Victim of armed conflict"
	if (`ev'==8)		loc evn "Fire"
	if (`ev'==9)		loc evn "Natural disaster"
	
	// General
	lab var q09_01`ev'  "`evn' - 2019/2021"
	lab var q09_01a`ev' "`evn' - 2019"
	lab var q09_01b`ev' "`evn' - 2020"
	lab var q09_01c`ev' "`evn' - 2021"
	lab var q09_01d`ev' "`evn' - None"
	lab var q09_01e`ev' "`evn' - DNK"
	lab var q09_04`ev' 	"`evn' value - 2019"
	lab var q09_07`ev' 	"`evn' value - 2020"  
	lab var q09_10`ev' 	"`evn' value - 2021"
	
	lab var q09_02`ev'	"`evn' - 2019"
	lab var q09_05`ev'	"`evn' - 2020"
	lab var q09_08`ev'	"`evn' - 2021"
	
	// better naming 
	rename q09_01`ev' 	q09_01_0`ev' 
	rename q09_01a`ev'	q09_01_0`ev'a
	rename q09_01b`ev'	q09_01_0`ev'b
	rename q09_01c`ev'	q09_01_0`ev'c
	rename q09_01d`ev'	q09_01_0`ev'd
	rename q09_01e`ev'	q09_01_0`ev'e
	rename q09_04`ev' 	q09_04_0`ev' 
	rename q09_07`ev' 	q09_07_0`ev' 
	rename q09_10`ev' 	q09_10_0`ev' 
	rename q09_02`ev'	q09_02_0`ev'
	rename q09_05`ev'	q09_05_0`ev'
	rename q09_08`ev'	q09_08_0`ev'
	 
	// label month year
	tokenize `c(alpha)' //abcedary
	 loc year = 2019
	 foreach k in "03" "06" "09" {
		forv i = 1/12 {
			loc month : word `i' of `c(Months)'
			noi di "`month'"
			loc az = "``i''"
			lab var q09_`k'`az'`ev' " `evn' - `month' `year'  "
			rename q09_`k'`az'`ev' q09_`k'_0`ev'`az' // better naming 
		}
		lab var q09_`k'z`ev' "`evn' - DNK `year'"
		rename q09_`k'z`ev' q09_`k'_0`ev'z // better naming 
		loc ++year 
	 }
 }
 
 //>>>>>> store temporal
 rename hh_id ID_hogar
 tempfile losses
 save `losses', replace
 
 *---------- 2.2 merge with main data 
 use "${dir3r}/01_survey/replica/survey_targeting_r1.dta", clear
 
 cap drop m_losses
 merge 1:1 ID_hogar using `losses', gen(m_losses)
 
 order q09_* , after(assets_05ah)
 
 rename q09_* losses_*
 
 * Store changes 
 save "${dir3r}/01_survey/replica/survey_targeting_r1.dta" , replace
 
  
 
exit
/* End of do-file */

><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><

Notes:
1.
2.
3.


Version Control:


